import numpy as np
import pandas as pd
import os
from tqdm import tqdm
from time import sleep
np.set_printoptions(suppress=True)

# ----------------------------------------------------------- definition of feature functions

def statfeaturextraction(path, path1):
    # %% Import Data
    data = np.loadtxt(path)
    dataDamaged = np.loadtxt(path1)
    statistical_features = np.zeros((5, 12))
    # 0: peak; 1: root mean square; 2: variance; 3: skewness;
    # 4: kurtosis; 5: K factor
    # 0-5 U undamaged ; 6-11 D damaged

    for ii in range(np.size(data, 0)):
        # peak
        statistical_features[ii, 0] = np.max(data[ii, :])
        statistical_features[ii, 6] = np.max(dataDamaged[ii, :])
        # root mean square
        statistical_features[ii, 1] = np.sqrt(1 / np.size(data, 1) * np.sum(data[ii, :] ** 2))
        statistical_features[ii, 7] = np.sqrt(1 / np.size(dataDamaged, 1) * np.sum(dataDamaged[ii, :] ** 2))
        # variance
        statistical_features[ii, 2] = np.var(data[ii, :])
        statistical_features[ii, 8] = np.var(dataDamaged[ii, :])
        # skewness
        statistical_features[ii, 3] = (1 / np.size(data, 1) * np.sum((data[ii, :] - np.mean(data[ii, :])) ** 3)) / (
                    np.std(data[ii, :]) ** 3)
        statistical_features[ii, 9] = (1 / np.size(data, 1) * np.sum(
            (dataDamaged[ii, :] - np.mean(dataDamaged[ii, :])) ** 3)) / (np.std(dataDamaged[ii, :]) ** 3)
        # kurtosis
        statistical_features[ii, 4] = (1 / np.size(data, 1) * np.sum((data[ii, :] - np.mean(data[ii, :])) ** 4)) / (
                    np.std(data[ii, :]) ** 4)
        statistical_features[ii, 10] = (1 / np.size(data, 1) * np.sum(
            (dataDamaged[ii, :] - np.mean(dataDamaged[ii, :])) ** 4)) / (np.std(dataDamaged[ii, :]) ** 4)
        # k factor
        statistical_features[ii, 5] = statistical_features[ii, 0] * statistical_features[ii, 2]
        statistical_features[ii, 11] = statistical_features[ii, 6] * statistical_features[ii, 8]

    statistical_features = statistical_features.flatten()

    statistical_features = pd.DataFrame(statistical_features.reshape(-1, len(statistical_features)))

    return statistical_features

#######################################################################

os.getcwd()
os.chdir(r'D:/desktop/Jupternotebook/Thesis/Database/20') # 20 40 60
o = os.listdir()
o.sort(key=lambda x : int(x.split("_")[2]))
#  len(o) 8242
# ----------------------------------------------------- get all features
alldata =[]
for seed in tqdm(range(1, 5001)):
    sleep(0.05)
    s_1 = [s for s in o if (f'seed_{seed}_Damage') in s]
    if len(s_1) == 1:
        # print('un broken')
        path = s_1[0]
        path1 = s_1[0]
    if len(s_1) == 2:
        # print('broken')
        path = s_1[0]
        path1 = s_1[1]
    if seed % 50 == 0:
        print(seed)
        print(path, path1)

    statistical_features = statfeaturextraction(path, path1)
    alldata.append(statistical_features)

# np.shape(alldata)  (5000, 1, 60)
alldatas = np.zeros((5000,60))
for i in range(5000):
    alldatas[i] = alldata[i]
np.shape(alldatas)
df1= pd.DataFrame(alldatas) # 5000 rows × 60 columns
#--------------------------------------------------------------------------- get the seeds,and damage level information

seeds,damage = [],[]
for seed in range(1,5001):
    s_1 = [s for s in o if (f'seed_{seed}_Damage') in s]
    if len(s_1) == 1:
       # print('un broken')
        seeds.append(seed)
        level = '0'
        damage.append(level)
        path1 = s_1[0]
    if len(s_1) == 2:
        #print('broken')
        seeds.append(seed)
        name =s_1[1]
        name= name[:-4].split('_')
        level = name[4]
        damage.append(level)
# print(seeds,damage)
seeds = pd.DataFrame(seeds)
damage = pd.DataFrame(damage)
seeds.loc[:,'damage']=damage
df2 = pd.concat([seeds,df1],axis = 1)
df2.drop('Unnamed: 0')
# ---------------------------------------- final data (5000*62)
df2.to_csv('20dB_final_data.csv')

